import { FastifyReply } from "fastify";
import { BrowserContext, Page, HTTPResponse } from "puppeteer-core";
import { CDPService } from "../../services/cdp/cdp.service.js";
import { SessionService } from "../../services/session.service.js";
import { ScrapeFormat } from "../../types/index.js";
import { getErrors } from "../../utils/errors.js";
import { updateLog } from "../../utils/logging.js";
import { IProxyServer } from "../../utils/proxy.js";
import {
  cleanHtml,
  getDefuddleContent,
  htmlToMarkdown,
  transformHtml,
} from "../../utils/scrape/index.js";
import { normalizeUrl } from "../../utils/url.js";
import { PDFRequest, ScrapeRequest, ScreenshotRequest, SearchRequest } from "./actions.schema.js";
import { DefuddleResponse } from "defuddle";
import pdf2html from "pdf2html";
import {
  buildHtmlLikeMetadataFromPdf,
  extractLinksFromConvertedHtml,
} from "../../utils/scrape/pdfToHtml.js";
import { safeGoto } from "../../utils/scrape/safeGoTo.js";

export const handleScrape = async (
  sessionService: SessionService,
  browserService: CDPService,
  request: ScrapeRequest,
  reply: FastifyReply,
) => {
  const startTime = Date.now();
  let times: Record<string, number> = {};
  const { url, format, screenshot, pdf, proxyUrl, logUrl, delay } = request.body;

  let proxy: IProxyServer | null = null;
  let context: BrowserContext | null = null;

  try {
    if (proxyUrl) {
      proxy = await sessionService.proxyFactory(proxyUrl);
      await proxy.listen();
    }
    times.proxyTime = Date.now() - startTime;

    let page: Page;
    let response: HTTPResponse | null = null;
    let pdfResponse: HTTPResponse | null = null;
    let isPdfNavigation = false;

    if (!browserService.isRunning()) {
      await browserService.launch();
    }

    if (proxy) {
      // If a proxy is used, we proceed with browser navigation; implementing proxy-aware Node fetch
      // would require an HTTP agent and is outside current scope.
      context = await browserService.createBrowserContext(proxy.url);
      page = await context.newPage();
      times.proxyPageTime = Date.now() - startTime - times.proxyTime;
    } else {
      page = await browserService.getPrimaryPage();
      times.pageTime = Date.now() - startTime - times.proxyTime;
    }

    // PDF retrieval will use node fetch with session cookies; removed CDP tracking

    let normalizedUrl: string | null = null;
    if (url) {
      normalizedUrl = normalizeUrl(url);
      if (!normalizedUrl) {
        throw new Error(`Invalid URL: ${url}`);
      }
    }

    const safeResponse = normalizedUrl
      ? await safeGoto(page, normalizedUrl, {
          timeout: 30000,
          waitUntil: "domcontentloaded",
        })
      : { response: null, isPdf: false, pdfResponse: null };

    response = safeResponse.response !== null ? safeResponse.response : safeResponse.pdfResponse;
    pdfResponse = safeResponse.pdfResponse;
    const isPdf = safeResponse.isPdf;

    if (delay) {
      await new Promise((resolve) => setTimeout(resolve, delay));
    }

    const contentType = response?.headers()["content-type"]?.toLowerCase() || "";

    let scrapeResponse: Record<string, any> = {};
    let htmlContent = "";
    let cleanedHtml: string;
    let readabilityContent: DefuddleResponse;

    if (isPdf || contentType.includes("application/pdf")) {
      // Node fetch using session cookies (same browser auth state)
      const targetUrl = normalizedUrl || url!;
      const cookies = await page.cookies(targetUrl);
      const cookieHeader = cookies.map((c) => `${c.name}=${c.value}`).join("; ");
      const fetchHeaders: Record<string, string> = {};
      if (cookieHeader) fetchHeaders["Cookie"] = cookieHeader;
      if (!fetchHeaders["Referer"]) {
        const u = new URL(targetUrl);
        fetchHeaders["Referer"] = u.origin + "/";
      }
      const nodeRes = await fetch(targetUrl, {
        method: "GET",
        redirect: "follow",
        headers: fetchHeaders,
      });
      const nodeCT = (nodeRes.headers.get("content-type") || "").toLowerCase();
      if (!nodeRes.ok || !nodeCT.includes("application/pdf")) {
        throw new Error(`Expected PDF; got status ${nodeRes.status} content-type ${nodeCT}`);
      }
      const arrBuf = await nodeRes.arrayBuffer();
      const pdfBuffer = Buffer.from(arrBuf);

      const convertStart = Date.now();
      htmlContent = await pdf2html.html(pdfBuffer);
      times.pdfHtmlConvertTime = Date.now() - convertStart;

      const metaStart = Date.now();
      const pdfMeta = await pdf2html.meta(pdfBuffer);
      times.pdfMetaTime = Date.now() - metaStart;

      const htmlMeta = buildHtmlLikeMetadataFromPdf(pdfMeta, {
        urlSource: targetUrl,
        statusCode: nodeRes.status,
        htmlForFallback: htmlContent,
      });

      const htmlLinks = extractLinksFromConvertedHtml(htmlContent);

      scrapeResponse = {
        content: {},
        metadata: {
          ...htmlMeta,
          statusCode: nodeRes.status,
          headers: Object.fromEntries(nodeRes.headers.entries()),
          originalContentType: nodeCT,
          pdfAcquisition: "node-fetch-with-cookies",
        },
        links: htmlLinks,
      };

      if (pdf) {
        scrapeResponse.pdf = pdfBuffer.toString("base64");
      }
    } else {
      // Regular HTML flow
      await page.evaluate(() => {
        (window as any).__name = (func: Function) => func;
      });

      const [{ html, metadata, links }, base64Screenshot, pdfBuffer] = await Promise.all([
        page.evaluate(() => {
          const getMetaContent = (selector: string) => {
            const element = document.querySelector(selector);
            return element ? element.getAttribute("content") : null;
          };
          const getMetaByName = (name: string) => getMetaContent(`meta[name="${name}"]`);
          const getMetaByProperty = (property: string) =>
            getMetaContent(`meta[property="${property}"]`);

          const extractJsonLd = () => {
            const scripts = document.querySelectorAll('script[type="application/ld+json"]');
            const jsonLdData: any[] = [];
            scripts.forEach((script) => {
              try {
                const data = JSON.parse(script.textContent || "");
                jsonLdData.push(data);
              } catch (e) {
                console.error(e);
              }
            });
            return jsonLdData;
          };

          return {
            html: document.documentElement.outerHTML,
            links: [...document.links].map((l) => ({
              url: l.href,
              text: l.textContent?.trim() || "",
            })),
            metadata: {
              title: document.title,
              language: document.documentElement.lang,
              urlSource: window.location.href,
              timestamp: new Date().toISOString(),

              description: getMetaByName("description"),
              keywords: getMetaByName("keywords"),
              author: getMetaByName("author"),

              ogTitle: getMetaByProperty("og:title"),
              ogDescription: getMetaByProperty("og:description"),
              ogImage: getMetaByProperty("og:image"),
              ogUrl: getMetaByProperty("og:url"),
              ogSiteName: getMetaByProperty("og:site_name"),

              articleAuthor: getMetaByProperty("article:author"),
              publishedTime: getMetaByProperty("article:published_time"),
              modifiedTime: getMetaByProperty("article:modified_time"),

              canonical: document.querySelector('link[rel="canonical"]')?.getAttribute("href"),
              favicon: document.querySelector('link[rel="icon"]')?.getAttribute("href"),

              jsonLd: extractJsonLd(),
              statusCode: 200,
            },
          };
        }),
        screenshot ? page.screenshot({ encoding: "base64", type: "jpeg", quality: 100 }) : null,
        pdf ? page.pdf() : null,
      ]);

      htmlContent = html;
      times.extractionTime = Date.now() - startTime - (times.pageLoadTime || 0);

      scrapeResponse = { content: {}, metadata, links };

      if (base64Screenshot) {
        scrapeResponse.screenshot = base64Screenshot;
      }
      if (pdfBuffer) {
        scrapeResponse.pdf = Buffer.from(pdfBuffer).toString("base64");
      }
    }

    // Format handling (works for both PDF converted HTML and normal HTML)
    if (format && format.length > 0) {
      if (format.includes(ScrapeFormat.HTML)) {
        scrapeResponse.content.html = htmlContent;
      }

      const needsCleanedHtml = format.includes(ScrapeFormat.CLEANED_HTML);
      const needsReadability =
        format.includes(ScrapeFormat.READABILITY) || format.includes(ScrapeFormat.MARKDOWN);

      if (needsCleanedHtml) {
        const cleanHtmlStart = Date.now();
        cleanedHtml = cleanHtml(htmlContent);
        times.cleanedHtmlTime = Date.now() - cleanHtmlStart;

        if (format.includes(ScrapeFormat.CLEANED_HTML)) {
          scrapeResponse.content.cleaned_html = cleanedHtml;
        }
      }

      if (needsReadability) {
        const readabilityStart = Date.now();
        readabilityContent = await getDefuddleContent(
          transformHtml(htmlContent, normalizedUrl || url),
        );
        times.readabilityTime = Date.now() - readabilityStart;

        if (format.includes(ScrapeFormat.READABILITY)) {
          scrapeResponse.content.readability = readabilityContent.content;
        }
      }

      if (format.includes(ScrapeFormat.MARKDOWN)) {
        const markdownStart = Date.now();
        scrapeResponse.content.markdown = await htmlToMarkdown(readabilityContent!.content);
        times.markdownTime = Date.now() - markdownStart;
      }
    } else {
      scrapeResponse.content.html = htmlContent;
    }

    times.totalInstanceTime = Date.now() - startTime;

    if (logUrl) {
      await updateLog(logUrl, { times });
    }

    return reply.send(scrapeResponse);
  } catch (e: unknown) {
    const error = getErrors(e);

    if (logUrl) {
      await updateLog(logUrl, { times, response: { browserError: error } });
    }

    if (url) {
      await browserService.refreshPrimaryPage();
    }
    return reply.code(500).send({ message: error });
  } finally {
    if (context) {
      await context.close().catch(() => {});
    }
    if (proxy) {
      await proxy.close(true).catch(() => {});
    }
  }
};

export const handleSearch = async (
  sessionService: SessionService,
  browserService: CDPService,
  request: SearchRequest,
  reply: FastifyReply,
) => {
  const startTime = Date.now();
  let times: Record<string, number> = {};
  const { query, proxyUrl, logUrl } = request.body;

  let proxy: IProxyServer | null = null;
  let context: BrowserContext | null = null;

  try {
    if (proxyUrl) {
      proxy = await sessionService.proxyFactory(proxyUrl);
      await proxy.listen();
    }
    times.proxyTime = Date.now() - startTime;

    let page: Page;

    if (!browserService.isRunning()) {
      await browserService.launch();
    }

    if (proxy) {
      // If a proxy is used, we proceed with browser navigation; implementing proxy-aware Node fetch
      // would require an HTTP agent and is outside current scope.
      context = await browserService.createBrowserContext(proxy.url);
      page = await context.newPage();
      times.proxyPageTime = Date.now() - startTime - times.proxyTime;
    } else {
      page = await browserService.getPrimaryPage();
      times.pageTime = Date.now() - startTime - times.proxyTime;
    }

    await page.evaluate(() => {
      (window as any).__name = (func: Function) => func;
    });

    // Go to Brave
    await page.goto(`https://search.brave.com/search?q=${encodeURIComponent(query)}`, {
      waitUntil: "networkidle2",
    });

    // Wait for results to load
    await page.waitForSelector("#results");

    // Scrape results
    const results = await page.evaluate(() => {
      const items = document.querySelectorAll("div.snippet");

      return Array.from(items)
        .map((item) => {
          if (
            [
              "llm-snippet",
              "faq",
              "pagination-snippet",
              "search-elsewhere",
              "infoblox-snippet",
              "discussions",
            ].includes(item.id)
          ) {
            return;
          }
          const urlEl = item.querySelector("div.result-content a");
          const descEl = item.querySelector("div.generic-snippet");
          const titleEl = item.querySelector("div.result-content a div.title");

          return {
            title: titleEl?.textContent?.trim() || null,
            url: urlEl?.getAttribute("href") || null,
            description: descEl?.textContent?.split("-")[1]?.trim() || null,
          };
        })
        .filter(
          (item) =>
            item &&
            typeof item === "object" &&
            "title" in item &&
            "url" in item &&
            "description" in item &&
            item.title !== null &&
            item.url !== null,
        );
    });
    times.totalInstanceTime = Date.now() - startTime;

    if (logUrl) {
      await updateLog(logUrl, { times });
    }

    return reply.send({ results });
  } catch (e: unknown) {
    const error = getErrors(e);

    if (logUrl) {
      await updateLog(logUrl, { times, response: { browserError: error } });
    }

    return reply.code(500).send({ message: error });
  } finally {
    if (context) {
      await context.close().catch(() => {});
    }
    if (proxy) {
      await proxy.close(true).catch(() => {});
    }
  }
};

export const handleScreenshot = async (
  sessionService: SessionService,
  browserService: CDPService,
  request: ScreenshotRequest,
  reply: FastifyReply,
) => {
  const startTime = Date.now();
  let times: Record<string, number> = {};
  const { url, logUrl, proxyUrl, delay, fullPage } = request.body;

  let proxy: IProxyServer | null = null;
  let context: BrowserContext | null = null;

  if (!browserService.isRunning()) {
    await browserService.launch();
  }

  try {
    if (proxyUrl) {
      proxy = await sessionService.proxyFactory(proxyUrl);
      await proxy.listen();
    }

    times.proxyTime = Date.now() - startTime;

    let page: Page;

    if (proxy) {
      context = await browserService.createBrowserContext(proxy.url);
      page = await context.newPage();
      times.proxyPageTime = Date.now() - startTime - times.proxyTime;
    } else {
      page = await browserService.getPrimaryPage();
      times.pageTime = Date.now() - startTime;
    }

    if (url) {
      const normalizedUrl = normalizeUrl(url);
      if (!normalizedUrl) {
        throw new Error(`Invalid URL: ${url}`);
      }
      await page.goto(normalizedUrl, { timeout: 30000, waitUntil: "domcontentloaded" });
      times.pageLoadTime = Date.now() - times.pageTime - times.proxyTime - startTime;
    }

    if (delay) {
      await new Promise((resolve) => setTimeout(resolve, delay));
    }

    const screenshot = await page.screenshot({ fullPage, type: "jpeg", quality: 100 });
    times.screenshotTime =
      Date.now() - times.pageLoadTime - times.pageTime - times.proxyTime - startTime;

    if (logUrl) {
      await updateLog(logUrl, { times });
    }

    return reply.send(screenshot);
  } catch (e: unknown) {
    const error = getErrors(e);

    if (logUrl) {
      await updateLog(logUrl, { times, response: { browserError: error } });
    }

    if (url) {
      await browserService.refreshPrimaryPage();
    }

    return reply.code(500).send({ message: error });
  } finally {
    if (context) {
      await context.close().catch(() => {});
    }
    if (proxy) {
      await proxy.close(true).catch(() => {});
    }
  }
};

export const handlePDF = async (
  sessionService: SessionService,
  browserService: CDPService,
  request: PDFRequest,
  reply: FastifyReply,
) => {
  const startTime = Date.now();
  let times: Record<string, number> = {};
  const { url, logUrl, proxyUrl, delay } = request.body;

  let proxy: IProxyServer | null = null;
  let context: BrowserContext | null = null;

  if (!browserService.isRunning()) {
    await browserService.launch();
  }

  try {
    if (proxyUrl) {
      proxy = await sessionService.proxyFactory(proxyUrl);
      await proxy.listen();
    }

    times.proxyTime = Date.now() - startTime;

    let page: Page;

    if (proxy) {
      context = await browserService.createBrowserContext(proxy.url);
      page = await context.newPage();
      times.proxyPageTime = Date.now() - startTime - times.proxyTime;
    } else {
      page = await browserService.getPrimaryPage();
      times.pageTime = Date.now() - startTime;
    }

    if (url) {
      const normalizedUrl = normalizeUrl(url);
      if (!normalizedUrl) {
        throw new Error(`Invalid URL: ${url}`);
      }
      await page.goto(normalizedUrl, { timeout: 30000, waitUntil: "domcontentloaded" });
      times.pageLoadTime = Date.now() - times.pageTime - times.proxyTime - startTime;
    }

    if (delay) {
      await new Promise((resolve) => setTimeout(resolve, delay));
    }

    const pdf = await page.pdf();
    times.pdfTime = Date.now() - times.pageLoadTime - times.pageTime - times.proxyTime - startTime;

    if (logUrl) {
      await updateLog(logUrl, { times });
    }

    return reply.send(pdf);
  } catch (e: unknown) {
    const error = getErrors(e);

    if (logUrl) {
      await updateLog(logUrl, { times, response: { browserError: error } });
    }

    if (url) {
      await browserService.refreshPrimaryPage();
    }

    return reply.code(500).send({ message: error });
  } finally {
    if (context) {
      await context.close().catch(() => {});
    }
    if (proxy) {
      await proxy.close(true).catch(() => {});
    }
  }
};
